from pyspark import SparkConf, SparkContext
import os

os.environ['PYSPARK_PYTHON'] = "D:/soft/python/python-file/python.exe"

# 构建到MySQL数据库的连接
conf = SparkConf().setMaster("local[*]").setAppName("test_spark_app")
sc = SparkContext(conf=conf)
# print(sc.version)

# 数据容器对象
rdd1 = sc.parallelize([1, 2, 3, 4, 5])
rdd2 = sc.parallelize((1, 2, 3, 4, 5))
rdd3 = sc.parallelize("abcdefg")
rdd4 = sc.parallelize({1, 2, 3, 4, 5})
print(rdd1.collect())
print(rdd2.collect())
print(rdd3.collect())
print(rdd4.collect())

# def func(data):
#     return data * 10


rdd5 = rdd1.map(lambda x: x * 10).map(lambda x: x + 5)
print(rdd5.collect())
sc.stop()

# pip install -i https://pypi.tuna.tsinghua.edu.cn/simple pyspark
